package com.gxuwz.crawlers;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;

public class BasicJsoupCrawler {

	/**
	 * 开始抓取数据
	 * 
	 * @return Document doc
	 */
	public static Document start(String url) {
		// 从北京院校开始抓取，所以得手动添加一条北京院校的链接进数据库
		Document doc = null;
		try {
			doc = Jsoup
					.connect(url)
					.userAgent(
							"Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9.2.15)")
					.timeout(5000).get();
		} catch (Exception e) {
			e.printStackTrace();
		}
		return doc;
	}
	
}
